Now that we have a have a database to save to, we need to create the data that goes in there. We will run the CCP simulation with a variety of parameters and save it to the db. We'll start small here to get an idea of how much data we are creating.


In [1]:
import sqlite3
from numpy.random import beta
from numpy import arange

In [2]:
conn = sqlite3.connect('ICCP-database.sqlite')
c = conn.cursor()

In [3]:
def single_run_looped(n, a, b):
    """
    This is a single run of the CCP.
    n = number of unique coupons
    m_max = max number of draws to simulate (should be much greater than n)
    dist = how are the coupon probabilities distributed (uniform, normal, exponential)
    norm_scale = how much to scale the normal distribution standard deviation (0<= norm_scale <1)
    """
    m = 0 #start at zero draws
    cdf = (arange(n)+1.0)/n #create the draw probability distribution
    draws = [] #create our draw array
    uniques = [] #create our unique array (deque is faster but may break DB inserts - research)
    unique = 0
    while True:
        m+=1 #increment our draw counter
        rv = beta(a, b) #randomness that decides which coupon to draw
        draw = (cdf>rv).sum()
        if draw not in draws:
            draws.append(draw)
            unique+=1
        uniques.append(unique) #store the info
        if unique==n:#we'll stop once we have drawn all our coupons
            return uniques #this line returns the full unique draws list; the actual data we want to record

In [4]:
c.execute('drop table if exists draws;')
c.execute('drop table if exists sim;')
c.execute('create table if not exists sim(sim_num integer primary key, n integer, param real, max_draw integer);')
c.execute('create table if not exists draws(sim_num integer, draw_num integer, uniques integer, foreign key(sim_num) references sim(sim_num), primary key(sim_num, draw_num));')
conn.commit()

sim_num = 0
for n in arange(1,76,1):
    for param in arange(1, 2):
        for sim in range(100):
            sim_num = sim_num + 1
            uniques = single_run_looped(n, param, param)
            mydict = zip([sim_num]*len(uniques), range(1,len(uniques)+1), uniques)
            c.execute('insert into sim(sim_num, n, param, max_draw) values ({0}, {1}, {2}, {3});'.format(sim_num, n, param, len(uniques)))
            c.executemany('insert into draws(sim_num, draw_num, uniques) values (?,?,?);', mydict)

In [5]:
conn.commit()
conn.close()